#!/usr/bin/env python
# -*- coding: utf-8 -*-
import json
from pybloom_live import ScalableBloomFilter
import sys
reload(sys)
sys.setdefaultencoding('utf-8')


id_file_path = '/mnt5/contact_team/dsf/dsf_s15_在营企业id导出/在营企业nameId导出_2021-11-22_19_dc'
ent_file_path = '/mnt5/contact_team/dsf/dsf_s31_无联系方式企业/百度错误数据_2022-01-19_15_dc'
id_bloom = ScalableBloomFilter(initial_capacity=1000000, error_rate=0.01, mode=2)  # 慢点但是省内存。
with open(id_file_path,'r') as id_f , open(ent_file_path,'r') as ent_f,open('new_zy_md_file','w') as zy_f,open('new_normal_md_file','w') as normal_f:
    for line in id_f:
        line = line.strip()
        id_bloom.add(line)
    for line in ent_f:
        line = line.strip()
        if line:
            ent_name,_id = line.split('\t',1)
            if _id in id_bloom:
                zy_f.write(line+'\n')
            else:
                normal_f.write(line+'\n')



